import numpy as np
import pandas as pd


path = r"D:\outsource\投票算法\Resolution list_20240125_test.xlsx"


# 
df = pd.read_excel(path, sheet_name='红色和无颜色填充', header=[0,1])


df.shape


df.dropna(how='all', inplace=True)
df.dropna(how='all', axis=1, inplace=True)


df.shape


# # 重置索引，以便更容易地操作DataFrame
# df_reset = df.reset_index()


# 创建映射，将投票选项映射到数值（赞成=1，弃权=0，反对=-1）
vote_mapping = {'Y': 1, 'A': 0, 'N': -1, 'V': -1}

# 获取所有需要映射的列（即投票记录的列）
vote_columns = list(df.iloc[:, 8:])


vote_columns[:10]


# 应用映射
for col in vote_columns:
    df[col] = df[col].map(vote_mapping)


df.iloc[:2, :20]


df.iloc[-2:, :20]


df.columns[2]


df.dropna(subset=[('Unnamed: 2_level_0', 'Resolution')], inplace=True)


df.reset_index(drop=True, inplace=True)


df[df[('Unnamed: 2_level_0', 'Resolution')] == "S/RES/2128(2013)"]


# df.iloc[:595]


# df.iloc[:2373].to_excel(r'D:\outsource\投票算法\q1_data.xlsx', freeze_panes=[2,0])


def calculate_s_score(votes_a, votes_b):
    """
    根据两个国家的联合国投票记录计算 S 分数。

    参数:
    votes_a (pd.Series): 国家 A 的投票记录，用数值表示 (1, 0, -1)。
    votes_b (pd.Series): 国家 B 的投票记录，用数值表示 (1, 0, -1)。

    返回:
    float: 代表两个国家投票记录相似度的 S 分数。
    """
    
    # 计算两个国家间的投票差异，并排除掉NaN值
    differences = votes_a - votes_b
    absolute_differences = differences.abs().dropna()
    
    # 计算S分数
    s_score = 1 - sum(absolute_differences) / len(absolute_differences)
    
    return s_score


df_q2 = df.iloc[:595].copy()


df_q2


df_q2.notnull().sum().loc[lambda x: x==0]


list(df_q2)[:20]


# 之前算的：0.9008857022353437, 0.9283305227655987, 0.9152613827993255





q2_records = []


China = ('Unnamed: 8_level_0', 'CHINA')
us = ('Unnamed: 12_level_0', 'UNITED STATES')
russian = ('Unnamed: 11_level_0', 'RUSSIAN FEDERATION(USSR)')
uk = ('Unnamed: 9_level_0', 'UNITED KINGDOM')
france = ('Unnamed: 10_level_0', 'FRANCE')


df_q2.loc[590:595, ('African Group', 'MOROCCO')] = np.nan


df_q2.loc[590:595, ('African Group', 'MOROCCO')]


# 1. 中国和非洲国家组
s_score_list = []
for c in df_q2['African Group'].columns[:]:
    col = ('African Group', c)
    
    if df_q2[col].notnull().sum() > 0:
        print(col)
        score = calculate_s_score(df_q2[China], df_q2[col])
        s_score_list.append(score)


s_score = np.sum(s_score_list) / len(s_score_list)


s_score


q2_records.append(['中国和非洲国家组', s_score])


# 2. 美国和非洲国家组
s_score_list2 = []
for c in df_q2['African Group'].columns[:]:
    col = ('African Group', c)
    
    if df_q2[col].notnull().sum() > 0:
        print(col)
        score = calculate_s_score(df_q2[us], df_q2[col])
        s_score_list2.append(score)


s_score2 = np.sum(s_score_list2) / len(s_score_list2)


s_score2


q2_records.append(['美国和非洲国家组', s_score2])


# 3. 俄罗斯和非洲国家组
s_score_list3 = []
for c in df_q2['African Group'].columns[:]:
    col = ('African Group', c)
    
    if df_q2[col].notnull().sum() > 0:
        print(col)
        score = calculate_s_score(df_q2[russian], df_q2[col])
        s_score_list3.append(score)


s_score3 = np.sum(s_score_list3) / len(s_score_list3)


s_score3


q2_records.append(['俄罗斯和非洲国家组', s_score3])


# 4. 英国和非洲国家组
s_score_list4 = []
for c in df_q2['African Group'].columns[:]:
    col = ('African Group', c)
    
    if df_q2[col].notnull().sum() > 0:
        print(col)
        score = calculate_s_score(df_q2[uk], df_q2[col])
        s_score_list4.append(score)


s_score4 = np.sum(s_score_list4) / len(s_score_list4)


s_score4


q2_records.append(['英国和非洲国家组', s_score4])


# 5. 法国和非洲国家组
s_score_list5 = []
for c in df_q2['African Group'].columns[:]:
    col = ('African Group', c)
    
    if df_q2[col].notnull().sum() > 0:
        print(col)
        score = calculate_s_score(df_q2[france], df_q2[col])
        s_score_list5.append(score)


s_score5 = np.sum(s_score_list5) / len(s_score_list5)


s_score5


q2_records.append(['法国和非洲国家组', s_score5])





pd.DataFrame(q2_records, columns=['项目', 's score'])


pd.DataFrame(q2_records, columns=['项目', 's score']).to_excel(r'D:\outsource\投票算法\Q2_res.xlsx', index=False, freeze_panes=[1, 0])


















